home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
C/C++ Users Group Library 1996 July
/
C-C++ Users Group Library July 1996.iso
/
vol_300
/
333_01
/
awktab.c
< prev
Wrap
C/C++ Source or Header
|
1989-04-21
|
47KB
|
1,671 lines
/*
* Created by CSD YACC (IBM PC) from "AWKTAB.Y" */
/* #line 31 "AWKTAB.Y" */
#define YYDEBUG 12
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include "awk.h"
STATIC int NEAR PASCAL yylex(void);
STATIC int NEAR PASCAL parse_escape(char **string_ptr);
/* The following variable is used for a very sickening thing. The awk */
/* language uses white space as the string concatenation operator, but */
/* having a white space token that would have to appear everywhere in all */
/* the grammar rules would be unbearable. It turns out we can return */
/* CONCAT_OP exactly when there really is one, just from knowing what */
/* kinds of other tokens it can appear between (namely, constants, */
/* variables, or close parentheses). This is because concatenation has */
/* the lowest priority of all operators. want_concat_token is used to */
/* remember that something that could be the left side of a concat has */
/* just been returned. If anyone knows a cleaner way to do this (don't */
/* look at the Un*x code to find one, though), please suggest it. */
static int want_concat_token;
/* Two more horrible kludges. The same comment applies to these two too */
static int want_regexp = 0; /* lexical scanning kludge */
int lineno = 1; /* JF for error msgs */
/* During parsing of a gAWK program, the pointer to the next character */
/* is in this variable. */
char *lexptr;
char *lexptr_begin;
/* #line 71 "AWKTAB.Y" */
typedef union
{
long lval;
AWKNUM fval;
NODE *nodeval;
int nodetypeval;
char *sval;
NODE *(PASCAL *ptrval)(NODE *);
} YYSTYPE;
#define NAME 257
#define REGEXP 258
#define YSTRING 259
#define ERROR 260
#define INCDEC 261
#define NUMBER 262
#define ASSIGNOP 263
#define MATCHOP 264
#define NEWLINE 265
#define CONCAT_OP 266
#define LEX_BEGIN 267
#define LEX_END 268
#define LEX_IF 269
#define LEX_ELSE 270
#define LEX_WHILE 271
#define LEX_FOR 272
#define LEX_BREAK 273
#define LEX_CONTINUE 274
#define LEX_DELETE 275
#define LEX_PRINT 276
#define LEX_PRINTF 277
#define LEX_NEXT 278
#define LEX_EXIT 279
#define RELOP_EQ 280
#define RELOP_GEQ 281
#define RELOP_LEQ 282
#define RELOP_NEQ 283
#define REDIR_APPEND 284
#define LEX_IN 285
#define LEX_AND 286
#define LEX_OR 287
#define INCREMENT 288
#define DECREMENT 289
#define LEX_BUILTIN 290
#define LEX_MATCH_FUNC 291
#define LEX_SUB_FUNC 292
#define LEX_SPLIT_FUNC 293
#define LEX_GETLINE 294
#define UNARY 295
#define yyclearin yychar = -1
#define yyerrok yyerrflag = 0
#ifndef YYMAXDEPTH
#define YYMAXDEPTH 150
#endif
YYSTYPE yylval; /*CSD & DECUS LEX */
YYSTYPE yyval; /*CSD & DECUS LEX */
#define YYERRCODE 256
/* #line 713 "AWKTAB.Y" */
struct token
{
char *operator;
int value;
int class;
NODE *(PASCAL *ptr)(NODE *);
};
/* Tokentab is sorted ascii ascending order, so it can be binary searched. */
/* DO NOT enter table entries out of order lest search can't find them. */
static struct token tokentab[] =
{
{ "BEGIN", NODE_ILLEGAL, LEX_BEGIN, NULL },
{ "END", NODE_ILLEGAL, LEX_END, NULL },
{ "atan2", NODE_BUILTIN, LEX_BUILTIN, do_atan2 },
#ifndef FAST
{ "bp", NODE_BUILTIN, LEX_BUILTIN, do_bp },
#endif
{ "break", NODE_K_BREAK, LEX_BREAK, NULL },
{ "close", NODE_BUILTIN, LEX_BUILTIN, do_close },
{ "continue", NODE_K_CONTINUE, LEX_CONTINUE, NULL },
{ "cos", NODE_BUILTIN, LEX_BUILTIN, do_cos },
{ "delete", NODE_K_DELETE, LEX_DELETE, NULL },
{ "else", NODE_ILLEGAL, LEX_ELSE, NULL },
{ "exit", NODE_K_EXIT, LEX_EXIT, NULL },
{ "exp", NODE_BUILTIN, LEX_BUILTIN, do_exp },
{ "for", NODE_K_FOR, LEX_FOR, NULL },
{ "getline", NODE_BUILTIN, LEX_GETLINE, do_getline },
{ "gsub", NODE_BUILTIN, LEX_SUB_FUNC, do_gsub },
{ "if", NODE_K_IF, LEX_IF, NULL },
{ "in", NODE_ILLEGAL, LEX_IN, NULL },
{ "index", NODE_BUILTIN, LEX_BUILTIN, do_index },
{ "int", NODE_BUILTIN, LEX_BUILTIN, do_int },
{ "length", NODE_BUILTIN, LEX_BUILTIN, do_length },
{ "log", NODE_BUILTIN, LEX_BUILTIN, do_log },
{ "lower", NODE_BUILTIN, LEX_BUILTIN, do_lower },
{ "match", NODE_BUILTIN, LEX_MATCH_FUNC, do_match },
{ "next", NODE_K_NEXT, LEX_NEXT, NULL },
{ "print", NODE_K_PRINT, LEX_PRINT, NULL },
{ "printf", NODE_K_PRINTF, LEX_PRINTF, NULL },
#ifndef FAST
{ "prvars", NODE_BUILTIN, LEX_BUILTIN, do_prvars },
#endif
{ "rand", NODE_BUILTIN, LEX_BUILTIN, do_rand },
{ "reverse", NODE_BUILTIN, LEX_BUILTIN, do_reverse },
{ "sin", NODE_BUILTIN, LEX_BUILTIN, do_sin },
{ "split", NODE_BUILTIN, LEX_SPLIT_FUNC, do_split },
{ "sprintf", NODE_BUILTIN, LEX_BUILTIN, do_sprintf },
{ "sqrt", NODE_BUILTIN, LEX_BUILTIN, do_sqrt },
{ "srand", NODE_BUILTIN, LEX_BUILTIN, do_srand },
{ "sub", NODE_BUILTIN, LEX_SUB_FUNC, do_sub },
{ "substr", NODE_BUILTIN, LEX_BUILTIN, do_substr },
{ "system", NODE_BUILTIN, LEX_BUILTIN, do_system },
{ "upper", NODE_BUILTIN, LEX_BUILTIN, do_upper },
{ "while", NODE_K_WHILE, LEX_WHILE, NULL }
};
/* Read one token, getting characters through lexptr. */
STATIC int NEAR PASCAL yylex(void)
{
register int c;
register int namelen;
register char *tokstart;
static int last_tok_1 = 0;
static int last_tok_2 = 0;
static int did_newline = 0; /* JF the grammar insists that */
/* actions end with newlines. */
auto int do_concat;
auto int seen_e = 0; /* These are for numbers */
auto int seen_point = 0;
auto int next_tab;
retry:
if(!lexptr)
return(0);
if (want_regexp)
{
/* there is a potential bug if a regexp is followed by an equal */
/* sign: "/foo/=bar" would result in assign_quotient being returned */
/* as the next token. Nothing is done about it since it is not */
/* valid awk, but maybe something should be done anyway. */
want_regexp = 0;
tokstart = lexptr;
while (c = *lexptr++)
{
switch (c)
{
case '\\':
if (*lexptr++ == EOS)
{
yyerror ("unterminated regexp ends with \\");
return(ERROR);
}
break;
case '/': /* end of the regexp */
lexptr--;
yylval.sval = tokstart;
return(REGEXP);
case '\n':
case EOS:
yyerror("unterminated regexp");
return(ERROR);
}
}
}
do_concat = want_concat_token;
want_concat_token = 0;
if (*lexptr == EOS)
{
lexptr = NULL;
return(NEWLINE);
}
/* if lexptr is at white space between two terminal tokens or parens, */
/* it is a concatenation operator. */
if (do_concat && (*lexptr == ' ' || *lexptr == '\t'))
{
while (*lexptr == ' ' || *lexptr == '\t')
lexptr++;
if (isalnum(*lexptr) || *lexptr == '\"' || *lexptr == '('
|| *lexptr == '.' || *lexptr == '$')
return(CONCAT_OP);
}
while (*lexptr == ' ' || *lexptr == '\t')
lexptr++;
tokstart = lexptr; /* JF */
last_tok_1 = last_tok_2;
last_tok_2 = *lexptr;
switch (c = *lexptr++)
{
case 0:
return(0);
case '\n':
++lineno;
if (',' == last_tok_1) /* BW: allow lines to be continued */
goto retry; /* at a comma */
return(NEWLINE);
case '#': /* it's a comment */
while (*lexptr != '\n